##############################################################################
#_______________________________ FBA Shares _______________________________#
##############################################################################

# This script uses average share price and volume among TAQ symbols in 2015 to estimate
# the market share taken by a Discrete (or FBA) exchange, using the latency arbitrage
# tax estimate from Aquilina, Budish and O’Neill (2022).
#
# This script uses the dataset "Symbol_Universe_2015.csv", which is a symbol-level 
# dataset containing symbol characteristics including average price and average 
# daily volume. Average price is volume-weighted average price, calculated
# by dividing ADV ($) by ADV (in shares).
#
# This script outputs a table that calculates, for each net fee the FBA
# exchange could charge, the market share and the revenue accrued to the exchange.
#
# (1) FBA Share (% of Share Volume)
# (2) FBA Share (% of Dollar Volume)
# (3) FBA Annual Revenue ($ Millions)
#
# We filter this dataset to only include symbols with the following conditions:
# (i) Symbol average share price > $5
# (ii) Symbol trades continuously throughout the year (f_Non_Trading_Days == 0)
#
# We then calculate the FBA share per symbol using the latency arbitrage tax
# estimate and the symbol's volume share. We calculate the overall FBA market share
# by summing up the symbol-level market shares. We calculate the revenue for
# the exchange as the fee * FBA share * total annual share volume of all symbols.

################################################################################
#_______________________________ Load Libraries _______________________________#
################################################################################

library(data.table)
library(dplyr)
library(scales)

#############################################################################
#_______________________________ Directories _______________________________#
#############################################################################

home.dir <- "~/Dropbox/projects/excomp/replication_materials/Analysis_of_TAQ_data/FBA_Shares/"
data.dir <- file.path(home.dir, 'Data')
fig.dir <- file.path(home.dir, 'Figures')
table.dir <- file.path(home.dir, 'Tables')
log.dir <- file.path(home.dir, 'Logs')

###########################################################################
#_______________________________ Load Data _______________________________#
###########################################################################

# Load the symbol universe file, which contains the average price and volume
# for all symbols in the TAQ universe in 2015.
# Note that this data is also merged with the CRSP datasets.
symbol_universe <- data.table::fread(file.path(data.dir, "Symbol_Universe_2015.csv"))

##################################################################################
#_______________________________ Hardcoded Values _______________________________#
##################################################################################

# Hardcoded values
PERCENT_TO_DECIMAL <- 0.01
DECIMAL_TO_BASIS_POINTS <- 10000
PENNIES_TO_DOLLARS <- 0.01
MILLS_TO_DOLLARS <- 0.0001
NUM_TRADING_DAYS_IN_2015 <- 252

# Latency Arbitrage Tax
ARB_TAX_DECIMAL <- 0.0042 * PERCENT_TO_DECIMAL

#############################################################################################
#_______________________________ Load Symbol Data and Filter _______________________________#
#############################################################################################

# We apply the following filters to obtain the symbol sample
# (1) Share price > $5.
# (2) Trades continuously throughout the year under the same symbol.
symbol_filters <- symbol_universe$Avg_Price > 5 &
                  symbol_universe$f_Non_Trading_Days == 0

symbol_sample <- symbol_universe[symbol_filters,]

# Print symbol characteristics
cat("Symbol Characteristics:", "\n")
cat("Percent of symbols: ", nrow(symbol_sample) / nrow(symbol_universe), "\n")
cat("Percent of share volume: ", sum(symbol_sample$ADV_S) / sum(symbol_universe$ADV_S, na.rm = TRUE), "\n")

# Create symbol variable
symbol_sample$symbol <- ifelse(is.na(symbol_sample$SYM_SUFFIX), symbol_sample$SYM_ROOT,
                               paste(symbol_sample$SYM_ROOT, symbol_sample$SYM_SUFFIX, sep = "_"))
# Remove periods in symbol variable.
symbol_sample$symbol <- gsub(".","", symbol_sample$symbol, fixed = TRUE)

# Extract symbol, price, and volume information
volume_price_sym <- symbol_sample[,c("SYM_ROOT", "SYM_SUFFIX", "symbol", "Avg_Price", "ADV_S", "ADV_D")]

# Export
volume_price_sym <- volume_price_sym[order(-volume_price_sym$Avg_Price),]
data.table::fwrite(volume_price_sym, file.path(table.dir, "symbol_level_volume_price.csv"))

##############################################################################
#_______________________________ Test Symbols _______________________________#
##############################################################################

# 2015 TAQ Master file
taq_master <- data.table::fread(file.path(data.dir, "taq_all_master_caps_20150102.csv"))
test_symbols_2015 <- taq_master[grepl("test", taq_master$SEC_DESC, ignore.case = TRUE),]

test_symbols_2015[test_symbols_2015$SYMBOL_SUFFIX == "TEST",]$SYMBOL_SUFFIX <- "T"
test_symbols_2015$symbol <- ifelse(is.na(test_symbols_2015$SYMBOL_SUFFIX), test_symbols_2015$SYMBOL_ROOT,
                              paste(test_symbols_2015$SYMBOL_ROOT, test_symbols_2015$SYMBOL_SUFFIX, sep = "_"))

# Intersection
print("Test symbols in the dataset:")
print(volume_price_sym[volume_price_sym$symbol %in% test_symbols_2015$symbol,])
cat("\n")

# Export
test_symbols_2015 <- test_symbols_2015[,c("SYMBOL_ROOT", "SYMBOL_SUFFIX", "SEC_DESC")]
data.table::fwrite(test_symbols_2015, file.path(table.dir, "test_symbols_in_subset_20150102.csv"))

# # 2017 TAQ Master file
# taq_master_2017 <- data.table::fread(file.path(data.dir, "taq_master_20170103.csv"))
# test_symbols_2017 <- taq_master_2017[taq_master_2017$TEST_SYMBOL_FLAG == "Y",]
# test_symbols_2017[test_symbols_2017$SYM_SUFFIX == "TEST",]$SYM_SUFFIX <- "T"
# test_symbols_2017$symbol <- ifelse(is.na(test_symbols_2017$SYM_SUFFIX), test_symbols_2017$SYM_ROOT,
#                                    paste(test_symbols_2017$SYM_ROOT, test_symbols_2017$SYM_SUFFIX, sep = "_"))
# 
# # Intersection
# print("Test symbols in the dataset:")
# print(volume_price_sym[volume_price_sym$symbol %in% test_symbols_2017$symbol,])
# cat("\n")
# 
# # Export Test Symbols
# test_symbols_2017 <- test_symbols_2017[,c("SYM_ROOT", "SYM_SUFFIX", "SEC_DESC")]
# data.table::fwrite(test_symbols_2017, file.path(table.dir, "test_symbols_in_subset_20170103.csv"))

##########################################################################################
#_______________________________ Calculate Percent Volume _______________________________#
##########################################################################################

# Calculate each symbol's share of total average share and dollar volume (out of all symbols in the sample)
volume_price_sym$pct_volume_s <- volume_price_sym$ADV_S / sum(volume_price_sym$ADV_S)
volume_price_sym$pct_volume_d <- volume_price_sym$ADV_D / sum(volume_price_sym$ADV_D)

####################################################################################################
#_______________________________ Create Fee/FBA Share/Revenue Table _______________________________#
####################################################################################################

# Create a grid of fees (1, 2, 3,... 9, 10, 15, 20,... 55, 60)
fees_mills <- c(seq(0, 10, 1), seq(15, 60, 5))

# Initialize empty table
fee_fba_share_revenue_table <- data.frame(fba_pct_of_share_volume = numeric(0),
                                          fba_pct_of_dollar_volume = numeric(0),
                                          revenue = numeric(0))

# For each fee, we produce a row for:
# (1) FBA Share (% of Share Volume)
# (2) FBA Share (% of Dollar Volume)
# (3) FBA Annual Revenue ($ Millions)
for (fee_mills in fees_mills) {

    # Create temporary copy of the symbol-level dataset.
    volume_price_sym_temp <- volume_price_sym

    # Calculate Fee in terms of dollars instead of mills.
    fee_dollars <- (fee_mills * MILLS_TO_DOLLARS)

    # Calculate the FBA Share (without truncation).
    volume_price_sym_temp$fba_share <- ((volume_price_sym_temp$Avg_Price * ARB_TAX_DECIMAL) - fee_dollars) / PENNIES_TO_DOLLARS
    # Truncate with upper bound equal to 1 and lower bound equal to 0.
    volume_price_sym_temp$fba_share <- pmax(pmin(volume_price_sym_temp$fba_share, 1), 0)

    # Calculate FBA Share (% of Share Volume and Dollar Volume) for each symbol.
    volume_price_sym_temp$fba_share_volume_s <- volume_price_sym_temp$pct_volume_s * volume_price_sym_temp$fba_share
    volume_price_sym_temp$fba_share_volume_d <- volume_price_sym_temp$pct_volume_d * volume_price_sym_temp$fba_share

    # Cumulative FBA Share (% of Share Volume) for all symbols in sample.
    fba_share_volume_s_all_symbols <- sum(volume_price_sym_temp$fba_share_volume_s)

    # Cumulative FBA Share (% of Dollar Volume) for all symbols in sample.
    fba_share_volume_d_all_symbols <- sum(volume_price_sym_temp$fba_share_volume_d)

    # Revenue the FBA exchange receives for each symbol.
    symbol_revenue <- fee_dollars * volume_price_sym_temp$fba_share * volume_price_sym_temp$ADV_S * NUM_TRADING_DAYS_IN_2015
    # Calculate total revenue by summing symbol-level revenue.
    total_revenue <- sum(symbol_revenue)

    # Append row to table.
    fee_fba_share_revenue_table <- rbind(fee_fba_share_revenue_table, c(fba_share_volume_s_all_symbols,
                                                                        fba_share_volume_d_all_symbols,
                                                                        total_revenue))
}

# Add the fees columns, change the column names.
fee_fba_share_revenue_table <-cbind(as.matrix(fees_mills), fee_fba_share_revenue_table)
colnames(fee_fba_share_revenue_table) <- c("Fee (Mills)",
                                           "FBA Share (% of Share Volume)",
                                           "FBA Share (% of Dollar Volume)",
                                           "FBA Annual Revenue ($ Millions)")

# Export table.
data.table::fwrite(fee_fba_share_revenue_table, file.path(table.dir, "fee_fba_share_revenue_table.csv"))
